Visualization for Anime¶

In [3]:
import pandas as pd
import warnings
import ast
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from PIL import Image
warnings.filterwarnings('ignore')
In [26]:
df = pd.read_csv("../../data/csv/anime_data.csv")
In [27]:
df
Out[27]:
Unnamed: 0 title genre episodes popularity score
0 0 "0" ['Music'] 1.0 7345.0 4.77
1 1 "Aesop" no Ohanashi yori: Ushi to Kaeru, Yokub... ['Kids'] 1.0 12413.0 5.61
2 2 "Bungaku Shoujo" Kyou no Oyatsu: Hatsukoi ['Comedy', 'Fantasy', 'School'] 1.0 3466.0 6.96
3 3 "Bungaku Shoujo" Memoire ['Drama', 'Romance', 'School'] 3.0 2943.0 7.40
4 4 "Bungaku Shoujo" Movie ['Mystery', 'Drama', 'Romance', 'School'] 1.0 1799.0 7.48
... ... ... ... ... ... ...
15868 15868 xxxHOLiC Rou ['Mystery', 'Supernatural'] 2.0 1869.0 8.21
15869 15869 xxxHOLiC Shunmuki ['Mystery', 'Comedy', 'Psychological', 'Supern... 2.0 1813.0 8.13
15870 15870 Üks Uks ['Dementia'] 1.0 14107.0 5.71
15871 15871 ēlDLIVE ['Action', 'Sci-Fi', 'Space', 'Police', 'Shoun... 12.0 2260.0 6.23
15872 15872 ◯ ['Dementia', 'Music'] 1.0 8820.0 3.80

15873 rows × 6 columns

In [28]:
df.drop(columns=["Unnamed: 0"], inplace=True)
In [29]:
df_new = df[df["episodes"] != "Not specified"]
In [30]:
df_new
Out[30]:
title genre episodes popularity score
0 "0" ['Music'] 1.0 7345.0 4.77
1 "Aesop" no Ohanashi yori: Ushi to Kaeru, Yokub... ['Kids'] 1.0 12413.0 5.61
2 "Bungaku Shoujo" Kyou no Oyatsu: Hatsukoi ['Comedy', 'Fantasy', 'School'] 1.0 3466.0 6.96
3 "Bungaku Shoujo" Memoire ['Drama', 'Romance', 'School'] 3.0 2943.0 7.40
4 "Bungaku Shoujo" Movie ['Mystery', 'Drama', 'Romance', 'School'] 1.0 1799.0 7.48
... ... ... ... ... ...
15868 xxxHOLiC Rou ['Mystery', 'Supernatural'] 2.0 1869.0 8.21
15869 xxxHOLiC Shunmuki ['Mystery', 'Comedy', 'Psychological', 'Supern... 2.0 1813.0 8.13
15870 Üks Uks ['Dementia'] 1.0 14107.0 5.71
15871 ēlDLIVE ['Action', 'Sci-Fi', 'Space', 'Police', 'Shoun... 12.0 2260.0 6.23
15872 ◯ ['Dementia', 'Music'] 1.0 8820.0 3.80

15611 rows × 5 columns

In [31]:
# Convert 'genre' to a list
df_new['genre'] = df_new['genre'].apply(ast.literal_eval)

# Explode the DataFrame
df_exploded = df_new.explode('genre')
In [32]:
# Define the genres and their corresponding broad categories
genre_categories = {
    'Action and Adventure': ['Action', 'Adventure', 'Super Power', 'Martial Arts', 'Samurai', 'Military'],
    'Fantasy and Supernatural': ['Fantasy', 'Supernatural', 'Magic', 'Vampire', 'Demons'],
    'Science Fiction and Technology': ['Sci-Fi', 'Mecha', 'Space', 'Cars'],
    'Comedy and Slice of Life': ['Comedy', 'Slice of Life', 'Parody'],
    'Drama and Romance': ['Drama', 'Romance', 'Shoujo', 'Shounen Ai', 'Josei', 'Shoujo Ai', 'Yaoi'],
    'Horror and Thriller': ['Horror', 'Thriller', 'Psychological', 'Mystery'],
    'Historical and Cultural': ['Historical', 'Samurai', 'Seinen'],
    'Youth and School Life': ['School', 'Shounen', 'Kids', 'Shoujo'],
    'Special Interests': ['Ecchi', 'Harem', 'Hentai', 'Yaoi', 'Yuri'],
    'Miscellaneous': ['Music', 'Sports', 'Game', 'Police', 'Dementia']
}

# Function to map each genre to its broader category
def map_genre_to_category(genre):
    for category, genres in genre_categories.items():
        if genre in genres:
            return category
    return 'Miscellaneous'

# Map each genre to its broader category
df_exploded['category'] = df_exploded['genre'].apply(map_genre_to_category)
In [33]:
df_exploded['title'] = df_exploded['title'].str.replace(r"[^\w\s]", '', regex=True)
In [34]:
df_exploded = df_exploded.groupby(["category", "title"], as_index=False)[["popularity", "score"]].mean()
In [35]:
# Group by genre and sort
top_score = df_exploded.groupby('category', as_index=False).apply(lambda x: x.nlargest(10, 'score'))
top_popularity = df_exploded.groupby('category', as_index=False).apply(lambda x: x.nlargest(10, 'popularity'))
In [36]:
top_score.reset_index(drop=True, inplace=True)
In [37]:
top_score_final = top_score.groupby(["category", "title"], as_index=False)["score"].mean()
In [38]:
top_score_final
Out[38]:
category title score
0 Action and Adventure Code Geass Hangyaku no Lelouch R2 8.93
1 Action and Adventure Fullmetal Alchemist Brotherhood 9.23
2 Action and Adventure Ginga Eiyuu Densetsu 9.03
3 Action and Adventure Gintama 9.00
4 Action and Adventure Gintama Enchousen 8.98
... ... ... ...
95 Youth and School Life Hunter x Hunter 2011 9.11
96 Youth and School Life Kimi no Na wa 9.09
97 Youth and School Life Koe no Katachi 9.01
98 Youth and School Life Quiz de Manabu Pinocchio no Koutsuu Ansen 9.08
99 Youth and School Life Shingeki no Kyojin Season 3 Part 2 9.07

100 rows × 3 columns

In [39]:
top_popularity.reset_index(drop=True, inplace=True)
In [40]:
top_popularity_final = top_popularity.groupby(["category", "title"], as_index=False)["popularity"].mean()
In [41]:
top_popularity_final
Out[41]:
category title popularity
0 Action and Adventure Boy General 16308.0
1 Action and Adventure Jumbagi Hanbandoui gongryong 3D 16238.0
2 Action and Adventure Mabeob Chunjamun Season 2 16305.0
3 Action and Adventure Miniforce 16166.0
4 Action and Adventure Ninja Tamamaru no Hi no Youjin 16225.0
... ... ... ...
95 Youth and School Life Robot Trains 16315.0
96 Youth and School Life Usagi no Mofy TV 2014 16317.0
97 Youth and School Life Usagi no Mofy TV 2016 16319.0
98 Youth and School Life Watashi no Ningyou 16310.0
99 Youth and School Life Watashi to Piano 16313.0

100 rows × 3 columns

In [42]:
# Create a subplot
fig = make_subplots()

# Group data by category
categories = top_score_final['category'].unique()

# Add a trace for each anime title, making bars horizontal
# Start with the first category visible
first = True
for category in categories:
    category_df = top_score_final[top_score_final['category'] == category]
    category_df.sort_values(by='score', inplace=True)
    fig.add_trace(
        go.Bar(y=category_df['title'], x=category_df['score'], name=category, 
               orientation='h', visible=first, marker_color='#df6200')  # Set bar color to crimson
    )
    first = False  # Set the rest of the categories to invisible

# Create a dropdown menu
buttons = []
for i, category in enumerate(categories):
    button = dict(
        label=category,
        method="update",
        args=[{"visible": [i == j for j in range(len(categories))]},  # Only the selected category is visible
              {"title": f"Top Anime by for Genre: {category}"}])
    buttons.append(button)

# Update layout for dropdown, axes, and xaxis range
fig.update_layout(
    updatemenus=[dict(
        active=0,  # First category is active
        buttons=buttons,
        direction="down",
        pad={"r": 10, "t": 10},
        showactive=True,
        x=0.77,
        xanchor="left",
        y=1.25,
        yanchor="top"
    )],
    title="Top Rated Anime by Genre: Action and Adventure",
    xaxis_title="Score",
    yaxis_title="Title",
    xaxis=dict(range=[8, 10])  # Set the range for x-axis
)
fig.write_html("../../data/plots/top_animes_score.html")
# Show the plot
fig.show()
In [43]:
# Create a subplot
fig = make_subplots()

# Group data by category
categories = top_popularity_final['category'].unique()

# Add a trace for each anime title, making bars horizontal
# Start with the first category visible
first = True
for category in categories:
    category_df = top_popularity_final[top_popularity_final['category'] == category]
    category_df.sort_values(by='popularity', inplace=True)
    fig.add_trace(
        go.Bar(y=category_df['title'], x=category_df['popularity'], name=category, 
               orientation='h', visible=first, marker_color='#ccb716')  # Set bar color to crimson
    )
    first = False  # Set the rest of the categories to invisible

# Create a dropdown menu
buttons = []
for i, category in enumerate(categories):
    button = dict(
        label=category,
        method="update",
        args=[{"visible": [i == j for j in range(len(categories))]},  # Only the selected category is visible
              {"title": f"Most Popular Anime by Genre: {category}"}])
    buttons.append(button)

# Update layout for dropdown, axes, and xaxis range
fig.update_layout(
    updatemenus=[dict(
        active=0,  # First category is active
        buttons=buttons,
        direction="down",
        pad={"r": 10, "t": 10},
        showactive=True,
        x=0.77,
        xanchor="left",
        y=1.25,
        yanchor="top"
    )],
    title="Most Popular Anime by Genre: Action and Adventure",
    title_x=0.5,
    xaxis_title="Score",
    yaxis_title="Title",
    xaxis=dict(range=[13500, 16500])  # Set the range for x-axis
)

fig.write_html("../../data/plots/top_animes_popularity.html")

# Show the plot
fig.show()
In [4]:
suggested_anime = pd.read_csv("../../data/csv/top20_animes_with_scores.csv")
In [45]:
suggested_anime
Out[45]:
anime_name japanese_title count matched_score
0 Steins Gate Steins;Gate 19606 9.11
1 Attack on Titan Shingeki no Kyojin 16722 8.47
2 One Piece Wan Pisu 10936 8.53
3 Violet Evergarden Vaioretto Evagaden 8780 8.62
4 Made in Abyss Meido in Abisu 8558 8.83
5 Monster Monsuta 8543 8.69
6 Clannad Kuranado 7461 8.16
7 Zero Zero 6724 5.98
8 Death Note Desu Noto 6559 8.65
9 Code Geass Code Geass 6169 7.49
10 Cowboy Bebop Cowboy Bebop 5758 8.81
11 Horimiya Horimiya 5710 5.30
12 Gintama Gintama 5654 8.97
13 Berserk Berserk 5485 6.60
14 Dororo Dororo 5053 8.23
15 Vinland Saga Vinland Saga 5043 8.78
16 Overlord Overlord 4887 8.05
17 Naruto Naruto 4778 7.93
18 Death Parade Death Parade 4663 8.22
19 Mob Psycho Mob Psycho 4621 8.51
In [5]:
df = suggested_anime.nlargest(10, 'count')

# Number of variables
num_vars = len(df['anime_name'])

# Compute angle for each bar
angles = np.linspace(0, 360, num_vars, endpoint=False).tolist()

# Create Plotly Figure
fig = go.Figure()

# Add bars
for angle, value, score, name in zip(angles, df['count'], df['matched_score'], df['japanese_title']):
    fig.add_trace(go.Barpolar(
        r=[value],
        theta=[angle],
        width=18,
        name=name,
        marker_color='#dc8d23',
        # opacity=0.5,
        text=f"{score:.2f}",
        hoverinfo="name+text"
    ))

# Update the layout to set background color to white
fig.update_layout(
    title='Most Suggested Anime on Reddit',
    title_x=0.5,
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, max(df['count'])],
            gridcolor='#648b9b'  # Make radial gridlines transparent
        ),
        angularaxis=dict(
            tickvals=angles,
            ticktext=df['japanese_title'],
            gridcolor='rgba(0,0,0,0)'  # Make angular gridlines transparent
        ),
        radialaxis_linecolor='gray',
        angularaxis_linecolor='gray',
    ),
    paper_bgcolor='white',  # Set the background color of the paper to white
    plot_bgcolor='rgba(0,0,0,0)', 
    showlegend=False,
    width=800,
    height=800
)

# Set the color of the polar background to white
fig.update_polars(bgcolor='white')

#write to html
fig.write_html("../../data/plots/top_anime_suggested.html")

# Show the plot
fig.show()
In [6]:
data = suggested_anime.nlargest(10, 'count')
df = data.sort_values(by='count', ascending=False)

# Load movie icon
icon_path = 'review-icon.png'  # Path to your icon image
movie_icon = Image.open(icon_path)
imagebox = OffsetImage(movie_icon, zoom=0.03)  # Adjust the zoom level

# Creating the lollipop plot
fig, ax = plt.subplots(figsize=(18, 4.2))  # Adjust the size as needed

# Add lines (sticks) for each lollipop
for title, rating in zip(df['japanese_title'], df['matched_score']):
    ax.plot([title, title], [5, rating], color='#dc8d23', linewidth=5)

# Add custom icons as lollipop heads
for title, rating in zip(df['japanese_title'], df['matched_score']):
    ab = AnnotationBbox(imagebox, (title, rating), frameon=False)
    ax.add_artist(ab)

# Customize the layout
ax.set_title('Review Ratings for Top Suggested Anime')
ax.set_xlabel('Anime Title')
ax.set_ylabel('Anime Rating')
ax.set_ylim(5, 10)
ax.set_facecolor('white')
ax.grid(False)

#save the plot
plt.savefig('../../data/plots/review_rating_of_top_anime.png', dpi =300)

# Show the plot
plt.show()
In [ ]:
suggested_anime
Out[ ]:
anime_name japanese_title count matched_score
0 Steins Gate Steins;Gate 19606 9.11
1 Attack on Titan Shingeki no Kyojin 16722 8.47
2 One Piece Wan Pisu 10936 8.53
3 Violet Evergarden Vaioretto Evagaden 8780 8.62
4 Made in Abyss Meido in Abisu 8558 8.83
5 Monster Monsuta 8543 8.69
6 Clannad Kuranado 7461 8.16
7 Zero Zero 6724 5.98
8 Death Note Desu Noto 6559 8.65
9 Code Geass Code Geass 6169 7.49
10 Cowboy Bebop Cowboy Bebop 5758 8.81
11 Horimiya Horimiya 5710 5.30
12 Gintama Gintama 5654 8.97
13 Berserk Berserk 5485 6.60
14 Dororo Dororo 5053 8.23
15 Vinland Saga Vinland Saga 5043 8.78
16 Overlord Overlord 4887 8.05
17 Naruto Naruto 4778 7.93
18 Death Parade Death Parade 4663 8.22
19 Mob Psycho Mob Psycho 4621 8.51
In [ ]:
top_score_final
Out[ ]:
category title score
0 Action and Adventure Code Geass Hangyaku no Lelouch R2 8.93
1 Action and Adventure Fullmetal Alchemist Brotherhood 9.23
2 Action and Adventure Ginga Eiyuu Densetsu 9.03
3 Action and Adventure Gintama 9.00
4 Action and Adventure Gintama Enchousen 8.98
... ... ... ...
95 Youth and School Life Hunter x Hunter 2011 9.11
96 Youth and School Life Kimi no Na wa 9.09
97 Youth and School Life Koe no Katachi 9.01
98 Youth and School Life Quiz de Manabu Pinocchio no Koutsuu Ansen 9.08
99 Youth and School Life Shingeki no Kyojin Season 3 Part 2 9.07

100 rows × 3 columns

In [ ]:
# Group by 'title' and aggregate the 'category' values into a list
grouped_top_score_final = top_score_final.groupby('title').agg({'category': list, 'score': 'first'}).reset_index()

# Rename the 'category' column to 'Categories_list'
grouped_top_score_final = grouped_top_score_final.rename(columns={'category': 'Genre','title': 'Title','score': 'Score'})

# Display the result
top_10_score_grouped = grouped_top_score_final.sort_values(by='Score', ascending=False).head(10)
top_10_score_grouped
Out[ ]:
Title Genre Score
6 Fullmetal Alchemist Brotherhood [Action and Adventure, Comedy and Slice of Lif... 9.23
18 Hunter x Hunter 2011 [Action and Adventure, Fantasy and Supernatura... 9.11
47 SteinsGate [Horror and Thriller, Science Fiction and Tech... 9.11
23 Kimi no Na wa [Drama and Romance, Fantasy and Supernatural, ... 9.09
37 Quiz de Manabu Pinocchio no Koutsuu Ansen [Youth and School Life] 9.08
41 Shingeki no Kyojin Season 3 Part 2 [Action and Adventure, Drama and Romance, Fant... 9.07
7 Ginga Eiyuu Densetsu [Action and Adventure, Drama and Romance, Scie... 9.03
0 3gatsu no Lion 2nd Season [Comedy and Slice of Life, Drama and Romance, ... 9.02
25 Koe no Katachi [Drama and Romance, Youth and School Life] 9.01
8 Gintama [Action and Adventure, Comedy and Slice of Lif... 9.00
In [69]:
# Sort the DataFrame by 'Score' column in descending order
top_10_score_grouped = top_10_score_grouped.sort_values(by='Score', ascending=True)

# Define your custom color scale
custom_color_scale = [
    [0.0, '#ffba9d'],
    [0.5, '#ff8659'],
    [1.0, '#ff4301']
]

fig = px.bar(
    top_10_score_grouped,
    x="Score",
    y="Title",
    color="Score",
    text="Title: " +top_10_score_grouped["Title"] + "<br>Score: " + top_10_score_grouped["Score"].astype(str),  # Include both Title and Score in text
    hover_data={'Title':False, 
                'Score':False, 
                'Genre':True,},
    orientation="h" , # Set orientation to horizontal
    color_continuous_scale=custom_color_scale  
)

fig.update_layout(
    title="Top rated anime across all genre",
    xaxis_title="Score",  # Remove x-axis title
    yaxis_title="",  # Remove y-axis title
    xaxis_showticklabels=True,  # Hide x-axis tick labels
    yaxis_showticklabels=False,  # Hide y-axis tick labels
    showlegend=True,
    plot_bgcolor='white',   barmode='stack',  # Set bar mode to stack
    height=600,  # Increase the height of the plot 
    xaxis=dict(range=[8.5, 9.3])  
)
#save the plot
fig.write_html('../../data/plots/top_rated_anime_all_genre.html')

fig.show()
In [51]:
# Filter to display rows with top 10 scores
top_10_scores_df = top_score_final[top_score_final['score'] > 9]

# Display the filtered data frame
top_10_scores_df
Out[51]:
category title score
1 Action and Adventure Fullmetal Alchemist Brotherhood 9.23
2 Action and Adventure Ginga Eiyuu Densetsu 9.03
6 Action and Adventure Hunter x Hunter 2011 9.11
9 Action and Adventure Shingeki no Kyojin Season 3 Part 2 9.07
10 Comedy and Slice of Life 3gatsu no Lion 2nd Season 9.02
12 Comedy and Slice of Life Fullmetal Alchemist Brotherhood 9.23
20 Drama and Romance 3gatsu no Lion 2nd Season 9.02
23 Drama and Romance Fullmetal Alchemist Brotherhood 9.23
24 Drama and Romance Ginga Eiyuu Densetsu 9.03
26 Drama and Romance Kimi no Na wa 9.09
27 Drama and Romance Koe no Katachi 9.01
29 Drama and Romance Shingeki no Kyojin Season 3 Part 2 9.07
31 Fantasy and Supernatural Fullmetal Alchemist Brotherhood 9.23
32 Fantasy and Supernatural Hunter x Hunter 2011 9.11
35 Fantasy and Supernatural Kimi no Na wa 9.09
39 Fantasy and Supernatural Shingeki no Kyojin Season 3 Part 2 9.07
40 Historical and Cultural 3gatsu no Lion 2nd Season 9.02
58 Horror and Thriller Shingeki no Kyojin Season 3 Part 2 9.07
59 Horror and Thriller SteinsGate 9.11
60 Miscellaneous 3gatsu no Lion 2nd Season 9.02
73 Science Fiction and Technology Ginga Eiyuu Densetsu 9.03
79 Science Fiction and Technology SteinsGate 9.11
90 Youth and School Life Fullmetal Alchemist Brotherhood 9.23
95 Youth and School Life Hunter x Hunter 2011 9.11
96 Youth and School Life Kimi no Na wa 9.09
97 Youth and School Life Koe no Katachi 9.01
98 Youth and School Life Quiz de Manabu Pinocchio no Koutsuu Ansen 9.08
99 Youth and School Life Shingeki no Kyojin Season 3 Part 2 9.07
In [70]:
import plotly.express as px
# Sort the DataFrame by 'score' column in descending order
dotdf_filter = top_10_scores_df.sort_values(by='score', ascending=True)

fig = px.scatter(dotdf_filter, y="title", x="category", color_discrete_sequence=['#ff4301'],hover_data={'title':True, 
                'score':True, 
                'category':True,})
fig.update_traces(marker_size=10)
fig.update_layout(
    title="Genre of top rated anime",
    plot_bgcolor='white',
    yaxis=dict(
        side='left',  # Place y-axis on the left side
        title='Title',  # Set your y-axis title here
         gridcolor='lightgray'
    ),
    xaxis=dict(
        title='Genre',  # Set your y-axis title here  
    )
)

#save the plot
fig.write_html('../../data/plots/genre_top_rated_anime.html')

fig.show()